In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import os
import matplotlib.pyplot as plt
from datetime import datetime, date
from scipy.stats import pearsonr, spearmanr, zscore
import statsmodels.api as sm
from itertools import combinations, permutations
import warnings

warnings.filterwarnings('ignore')

plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
In [2]:
# 数据读取、清洗

def clean_data(df_name):
    index_count = df_name.shape[0]
    drop_columns = set()
    for column in df_name.columns:
        missing_count = df_name[column].isna().sum()
        if missing_count/index_count > 0.5:
            drop_columns.add(column)
        else:
            df_name[column].interpolate(inplace=True)
            df_name[column].fillna(df_name[column].loc[df_name[column].first_valid_index()],inplace=True)
    return drop_columns


class DataConstruct:
    drop_columns = {'000046.SZ', '002002.SZ', '000666.SZ', '002013.SZ'}

    def __init__(self, data_path):
        self.data_path = data_path
        self.data_space = self.cal_return()

    def read_data(self):
        file_names = os.listdir(self.data_path)
        data_space = []
        for file_name in file_names:
            if file_name.endswith('.csv'):
                file_path = os.path.join(self.data_path, file_name)
                df_name = file_name[:-4]
                globals()[df_name] = pd.read_csv(file_path)
                if df_name=='RESSET_FINRATIO':
                    globals()[df_name]['TRADE_DT'] = pd.to_datetime(globals()[df_name]['TRADE_DT'], format='%Y/%m/%d')
                else:
                    globals()[df_name]['TRADE_DT'] = pd.to_datetime(globals()[df_name]['TRADE_DT'],format='%Y%m%d')
                globals()[df_name]['TRADE_DT'] = globals()[df_name]['TRADE_DT'].dt.to_period('M')
                if df_name in ['RevenueTechnicalFactor1', 'RESSET_FINRATIO']:
                    for column in globals()[df_name].columns[2:]:
                        data_space.append(column)
                        globals()[column] = globals()[df_name].pivot_table(index='TRADE_DT', columns='S_INFO_WINDCODE', values=column)
                else:
                    data_space.append(df_name)
                    globals()[df_name].set_index('TRADE_DT',inplace=True)
        return data_space

    def process_data_get_factors(self):
        data_space = self.read_data()
        for df_name in data_space:
            DataConstruct.drop_columns = DataConstruct.drop_columns | clean_data(globals()[df_name])
        for df_name in data_space:
            to_drop = DataConstruct.drop_columns & set(globals()[df_name].columns)# & set(globals()['industry'].index)
            globals()[df_name].drop(columns=to_drop,inplace=True)
        globals()['pe_ttm'] = globals()['pe_ttm'].apply(lambda x: 1.0000/x)
        globals()['mv'] = globals()['mv'].apply(np.log)
        return data_space

    def cal_return(self):
        data_space = self.process_data_get_factors()
        global stock_return, stock_return_next
        stock_return = globals()['close'].pct_change()
        stock_return_next = stock_return.shift(periods=-1)
        data_space.extend(['stock_return','stock_return_next'])
        return data_space

    def write_code_list(self):
        df = pd.DataFrame(globals()['pe_ttm'].columns.tolist())
        df.to_csv('code_list1.csv', index=False, header=False)
        return 0

    def print_all(self):
        for df_name in self.data_space:
            print('{%s} (%d * %d):' % (df_name,globals()[df_name].shape[0],globals()[df_name].shape[1]))
            print(globals()[df_name].iloc[:5,:5])
            print('\n')
In [3]:
# 单因子:分组回测

def backtest_group(factor, bins_n):
    if factor not in globals().keys():
        print("因子"+factor+"不存在可用数据,无法回测")
        return 0
    globals()[factor+'_t'] = globals()[factor].T
    labels = ['第' + str(i + 1) + '分位' for i in range(bins_n)]
    globals()[factor + '_t_groups'] = pd.DataFrame(index=globals()[factor+'_t'].index)
    globals()[factor + '_groups_return'] = pd.DataFrame(index=globals()[factor + '_t'].columns, columns=labels+['基准组合'])
    globals()[factor + '_groups_value'] = pd.DataFrame(index=globals()[factor + '_t'].columns, columns=labels+['基准组合'])
    globals()[factor + '_groups_value'].iloc[0] = 1.0000
    pre_value = globals()[factor + '_groups_value'].iloc[0]
    i=0
    for trade_date in globals()[factor+'_t'].columns[1:]:
        globals()[factor + '_t_groups'][trade_date] = pd.qcut(globals()[factor+'_t'][globals()[factor+'_t'].columns[i]],bins_n,labels=labels)
        for bin in labels:
            stock_for_bin = list(globals()[factor + '_t_groups'][globals()[factor + '_t_groups'][trade_date]==bin].index)
            globals()[factor + '_groups_return'].loc[trade_date,bin] = globals()['stock_return'].loc[trade_date,stock_for_bin].mean()
            globals()[factor + '_groups_value'].loc[trade_date,bin] = pre_value[bin] * (1 + globals()[factor + '_groups_return'].loc[trade_date,bin])
        globals()[factor + '_groups_return'].loc[trade_date,'基准组合'] = globals()['stock_return'].loc[trade_date].mean()
        globals()[factor + '_groups_value'].loc[trade_date,'基准组合'] = pre_value['基准组合'] * (1 + globals()[factor + '_groups_return'].loc[trade_date,'基准组合'])
        pre_value = globals()[factor + '_groups_value'].loc[trade_date]
        i+=1
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10,5), gridspec_kw={'left':0.06, 'bottom':0.14, 'right':0.965})
    s = 0
    for tp in ['return','value']:
        globals()[factor + f'_groups_{tp}'].plot(ax=axes[s],kind='line')
        axes[s].set_title(factor + f'因子分组回测(因子组合{tp})')
        s += 1
    fig.suptitle(factor + '因子分组测试')
    plt.show()
    return 0
In [4]:
# 单因子:行业、市值中性化,IC回测

def factor_zscore_neutral(factor):
    factor_data = globals()[factor].apply(zscore, axis=1)
    mv_data = globals()['mv'].copy()
    if globals()[factor].shape[1] != mv_data.shape[1]:
        mv_data = mv_data[globals()[factor].columns]
    factor_neutral = pd.DataFrame(index=factor_data.index, columns=factor_data.columns)
    global industry
    industry = pd.read_csv('industry.csv')
    industry.set_index('stock_code',inplace=True)
    industry.drop(columns=['stock_name','sw_industry_code'],inplace=True)
    industry_dummy = pd.get_dummies(globals()['industry']['sw_industry_name'],drop_first=True)
    for trade_date in factor_data.index:
        regression = pd.DataFrame(index=factor_data.columns)
        regression['factor'] = factor_data.loc[trade_date].tolist()
        regression['mv'] = mv_data.loc[trade_date].tolist()
        regression = pd.concat([regression,industry_dummy],axis=1,join='inner')
        if factor == 'mv':
            X = regression[list(industry_dummy.columns)]
        else:
            X = regression[['mv']+list(industry_dummy.columns)]
        X = sm.add_constant(X)
        y = regression['factor']
        model = sm.OLS(y, X.astype(float)).fit()
        factor_neutral.loc[trade_date] = model.resid.tolist()
    return factor_neutral


def backtest_icir(factor):
    factor_data = factor_zscore_neutral(factor)
    factor_data.drop(factor_data.index[-1],axis=0,inplace=True)
    next_return = globals()['stock_return_next'].drop(globals()['stock_return_next'].index[-1],axis=0)
    globals()[factor+'_ic'] = pd.DataFrame(index=factor_data.index,columns=['normal_ic','rank_ic'])
    for trade_date in factor_data.index:
        globals()[factor + '_ic'].loc[trade_date,'normal_ic'], _ = pearsonr(factor_data.loc[trade_date],next_return.loc[trade_date,factor_data.columns])
        globals()[factor + '_ic'].loc[trade_date, 'rank_ic'], _ = spearmanr(factor_data.loc[trade_date],next_return.loc[trade_date,factor_data.columns])
    print(f'【{factor}因子IC分析】')
    for ttype in ['normal', 'rank']:
        print(f'{ttype}_ic均值:'+str(round(globals()[factor+'_ic'][f'{ttype}_ic'].mean(),4))+f',{ttype}_ic标准差:'+str(round(globals()[factor+'_ic'][f'{ttype}_ic'].std(),4))+f',{ttype}_icir:'+str(round(globals()[factor+'_ic'][f'{ttype}_ic'].mean()/globals()[factor+'_ic'][f'{ttype}_ic'].std(),4))+',IC>0占比:'+str(round(len(globals()[factor+'_ic'][f'{ttype}_ic'][globals()[factor+'_ic'][f'{ttype}_ic']>0])/len(globals()[factor+'_ic'][f'{ttype}_ic']),4)))
    fig, ax1 = plt.subplots(figsize=(10,4))
    ax1.plot(globals()[factor + '_ic'].index.to_timestamp(),globals()[factor + '_ic']['normal_ic'],color='b')
    ax1.set_xlabel('TRADE_DT')
    ax1.set_ylabel('normal_ic',color='b')
    ax1.axhline(y=0, color='b', linestyle='--')
    ax2 = ax1.twinx()
    ax2.plot(globals()[factor + '_ic'].index.to_timestamp(),globals()[factor + '_ic']['rank_ic'],color='r')
    ax2.set_ylabel('rank_ic',color='r')
    ax2.axhline(y=0, color='r', linestyle='--')
    plt.title(factor+'因子回测(ic时间序列变化图)')
    plt.show()
    return 0
In [5]:
# 多因子:交叉分组回测

def dual_factor_cross_test(factor1, factor2, bins_n):
    code_list = list(set(globals()[factor1 + '_t'].index.tolist()) & set(globals()[factor2 + '_t'].index.tolist()))
    if factor1 in ['ROETTM', 'CurRt', 'NPPCCutGrRt', 'TotAstTRtTTM', 'NetOCFTOReve', 'CurTotLia', 'OPITPrf']:
        globals()[factor2 + '_t'] = globals()[factor2].T
        globals()[factor2 + '_t'] = globals()[factor2 + '_t'][globals()[factor1 + '_t'].columns]
    if factor2 in ['ROETTM', 'CurRt', 'NPPCCutGrRt', 'TotAstTRtTTM', 'NetOCFTOReve', 'CurTotLia', 'OPITPrf']:
        globals()[factor1 + '_t'] = globals()[factor1].T
        globals()[factor1 + '_t'] = globals()[factor1 + '_t'][globals()[factor2 + '_t'].columns]
    globals()[factor1 + '_t'] = globals()[factor1 + '_t'].loc[code_list]
    globals()[factor1 + '_t'] = globals()[factor1 + '_t'].loc[code_list]
    if factor1 + '_t' not in globals():
        backtest_group(factor1, bins_n)
    if factor2 + '_t' not in globals():
        backtest_group(factor2, bins_n)
    labels1 = [factor1 + str(i + 1) for i in range(bins_n)]
    labels2 = [factor2 + str(i + 1) for i in range(bins_n)]
    sample_df = pd.DataFrame(index=globals()[factor1 + '_t'].columns, columns=labels2+['基准组合'])
    sample_df1 = sample_df.copy()
    sample_df1.iloc[0] = 1.0000
    globals()['res_return_'+factor1+'_'+factor2] = {x: sample_df.copy() for x in labels1}
    globals()['res_value_'+factor1+'_'+factor2] = {x: sample_df1.copy() for x in labels1}
    f1_group = pd.DataFrame(index=globals()[factor1 + '_t'].index)
    i = 0
    for trade_date in globals()[factor1 + '_t'].columns[1:]:
        f1_group[trade_date] = pd.qcut(globals()[factor1 + '_t'][globals()[factor1 + '_t'].columns[i]], bins_n, labels=labels1)
        for bin in labels1:
            stock_for_bin_f1 = f1_group[f1_group[trade_date]==bin].index.tolist()
            f2_group = pd.DataFrame(index=stock_for_bin_f1)
            f2_group[trade_date] = pd.qcut(globals()[factor2 + '_t'].loc[stock_for_bin_f1,globals()[factor1 + '_t'].columns[i]], bins_n, labels=labels2)
            for bin1 in labels2:
                stock_for_bin_f2 = f2_group[f2_group[trade_date]==bin1].index.tolist()
                globals()['res_return_'+factor1+'_'+factor2][bin].loc[trade_date, bin1] = globals()['stock_return'].loc[trade_date, stock_for_bin_f2].mean()
                globals()['res_value_'+factor1+'_'+factor2][bin].loc[trade_date, bin1] = globals()['res_value_'+factor1+'_'+factor2][bin].loc[globals()[factor1 + '_t'].columns[i], bin1] * (1 + globals()['res_return_'+factor1+'_'+factor2][bin].loc[trade_date, bin1])
            globals()['res_return_'+factor1+'_'+factor2][bin].loc[trade_date, '基准组合'] = globals()['stock_return'].loc[trade_date, stock_for_bin_f1].mean()
            globals()['res_value_'+factor1+'_'+factor2][bin].loc[trade_date, '基准组合'] = globals()['res_value_'+factor1+'_'+factor2][bin].loc[globals()[factor1 + '_t'].columns[i], '基准组合'] * (1 + globals()['res_return_'+factor1+'_'+factor2][bin].loc[trade_date, '基准组合'])
        i += 1
    fig, axes = plt.subplots(nrows=2, ncols=bins_n, figsize=(10,6), gridspec_kw={'left':0.036, 'bottom': 0.105, 'right':0.983, 'top': 0.897, 'wspace':0.145, 'hspace': 0.455})
    s = 0
    for tp in ['return', 'value']:
        j = 0
        for bin in labels1:
            globals()[f'res_{tp}_' + factor1 + '_' + factor2][bin].plot(ax=axes[s,j],kind='line')
            axes[s,j].set_title(bin + f'因子组合{tp}时间序列变化图')
            j += 1
        s += 1
    fig.suptitle(factor1 + '因子&' + factor2 + '因子交叉分组测试')
    plt.show()
    return 0
In [6]:
# 多因子:相关系数

def get_corr(factor1, factor2):
    corr = []
    date_list = list(set(globals()[factor1 + '_t'].columns.tolist()) & set(globals()[factor2 + '_t'].columns.tolist()))
    code_list = list(set(globals()[factor1 + '_t'].index.tolist()) & set(globals()[factor2 + '_t'].index.tolist()))
    for trade_date in date_list:
        corr.append(pearsonr(globals()[factor1 + '_t'].loc[code_list,trade_date], globals()[factor2 + '_t'].loc[code_list,trade_date]))
    globals()['corr_matrix'].loc[factor1,factor2] = np.mean(corr)
    globals()['corr_matrix'].loc[factor2, factor1] = np.mean(corr)
    print(factor1 + '因子&' + factor2 + '因子相关系数: ' + str(round(np.mean(corr), 4)))
    return np.mean(corr)
In [7]:
# 因子库回测

def test_n_corsstest(factor_to_test, bins_n_single, bins_n_dual):
    for factor in factor_to_test:
        backtest_group(factor, bins_n_single)
        backtest_icir(factor)
    for group in combinations(factor_to_test, 2):
        get_corr(list(group)[0], list(group)[1])
        dual_factor_cross_test(list(group)[0], list(group)[1], bins_n_dual)
        dual_factor_cross_test(list(group)[1], list(group)[0], bins_n_dual)
    globals()['corr_matrix'] = globals()['corr_matrix'].astype(float)
    plt.figure(figsize=(10, 8))
    sns.heatmap(globals()['corr_matrix'], annot=True, cmap='YlGnBu', fmt=".2f", square=True)
    plt.title('因子相关系数矩阵')
    plt.xlabel('Factors')
    plt.ylabel('Factors')
    plt.show()
In [8]:
# 构建因子数据库

dt = DataConstruct('div_datas/')
dt.print_all()
{adj_close} (159 * 1972):
          000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                       
2010-12      566.95     920.12     116.76      65.03      21.76
2011-01      549.71     917.88     119.11      68.87      22.33
2011-02      571.98     913.40     120.95      84.85      24.08
2011-03      577.36     972.73     131.34      70.54      24.62
2011-04      653.48     949.22     130.16      67.75      29.59


{close} (159 * 1972):
          000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                       
2010-12       15.79       8.22       6.97      16.77       6.83
2011-01       15.31       8.20       7.11      17.76       7.01
2011-02       15.93       8.16       7.22      21.88       7.56
2011-03       16.08       8.69       7.84      18.19       7.73
2011-04       18.20       8.48       7.77      17.47       9.29


{div_12m} (159 * 1972):
          000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                       
2010-12      0.7599     0.8516     1.1478     0.2087     3.7258
2011-01      0.7599     0.8537     1.1252     0.1971     3.7258
2011-02      0.7599     0.8578     1.1080     0.1600     3.7258
2011-03      0.7599     0.8055     1.0204     0.1924     3.7258
2011-04      0.7599     0.8255     1.0296     0.2003     3.7258


{mv} (159 * 1972):
          000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                       
2010-12   24.731115  25.227296  22.391463  23.629723  22.127041
2011-01   24.700244  25.224860  22.411350  23.687081  22.153054
2011-02   24.739942  25.219970  22.426703  23.895705  22.228587
2011-03   24.749314  25.282899  22.509087  23.711004  22.250825
2011-04   24.873159  25.258436  22.500118  23.670617  22.434655


{pe_ttm} (159 * 1972):
          000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                       
2010-12    0.111347   0.062448   0.053597   0.017136   0.017665
2011-01    0.114838   0.062601   0.088819   0.016181   0.017212
2011-02    0.113189   0.062908   0.087466   0.013134   0.015960
2011-03    0.112133   0.076225   0.080737   0.016404   0.015609
2011-04    0.112064   0.078955   0.083926   0.018215   0.031607


{ROETTM} (53 * 1972):
S_INFO_WINDCODE  000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                              
2010-12            18.7505    16.4655    19.5847    13.0475    20.0185
2011-03            19.6679    16.1679    19.8346    13.2868    39.3927
2011-06            20.8248    16.1207    20.1171    13.2218    29.6570
2011-09            13.2202    16.1956    23.2231    11.5363    29.3072
2011-12            14.0206    18.1712    15.5064    10.1887    22.7792


{CurRt} (53 * 1925):
S_INFO_WINDCODE  000002.SZ  000006.SZ  000009.SZ  000011.SZ  000012.SZ
TRADE_DT                                                              
2010-12             1.5852     2.0348     2.0401     1.3145     0.7394
2011-03             1.4798     1.8241     2.2538     1.5556     0.8402
2011-06             1.4472     1.7249     2.4856     1.3432     0.6838
2011-09             1.3965     1.5310     2.1409     1.3008     0.6540
2011-12             1.4081     1.3927     1.8372     1.2624     0.6493


{NPPCCutGrRt} (53 * 1972):
S_INFO_WINDCODE  000001.SZ  000002.SZ  000006.SZ  000009.SZ   000011.SZ
TRADE_DT                                                               
2010-12            24.3779    33.4852   110.8195    67.7180     45.9244
2011-03            51.9121    10.0150   218.7476   -51.1087  53264.5773
2011-06            58.5081    11.0732    70.9762   -20.3746    154.1927
2011-09            63.3194    14.3419   149.0837     4.9186    143.6999
2011-12            65.9062    36.9758    -5.3197    -1.9106     60.6875


{TotAstTRtTTM} (53 * 1972):
S_INFO_WINDCODE  000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                              
2010-12             0.0274     0.2871     0.2953     0.3671     0.3456
2011-03             0.0276     0.2667     0.3234     0.3104     0.6348
2011-06             0.0293     0.2559     0.3407     0.3518     0.4964
2011-09             0.0271     0.2447     0.3789     0.3325     0.4647
2011-12             0.0299     0.2805     0.3066     0.3728     0.4393


{NetOCFTOReve} (53 * 1972):
S_INFO_WINDCODE  000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                              
2010-12           120.6634     4.4115    46.4706   -18.2278   -10.2478
2011-03           367.0922   -24.7823    61.4355   -36.0902   -39.5073
2011-06           260.0689    19.1399    73.0443   -14.0752   -38.6337
2011-09            86.2822    -5.1022    58.3608   -26.8802   -32.9223
2011-12           -48.7108     4.7218    46.2896   -20.3717   -25.6621


{CurTotLia} (53 * 1925):
S_INFO_WINDCODE  000002.SZ  000006.SZ  000009.SZ  000011.SZ  000012.SZ
TRADE_DT                                                              
2010-12            80.5028    62.3775    56.0464    84.5849    49.0328
2011-03            84.7000    71.4818    51.9318    83.3135    51.6812
2011-06            84.9579    76.4470    47.8392    94.3155    51.6158
2011-09            87.0344    89.5555    55.1359    95.0460    53.8889
2011-12            87.8920    92.8506    62.2953    94.5521    56.6879


{OPITPrf} (53 * 1972):
S_INFO_WINDCODE  000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                              
2010-12            92.3867    93.2270    95.5654    48.9114    92.4922
2011-03            93.2695    99.3269    99.9263    42.4872    99.9312
2011-06            91.5557    98.5198    94.6570    62.4365    99.7979
2011-09            93.9326    97.7823    97.0782    63.5321    99.9971
2011-12            94.8669    95.3213    98.0730    61.1790   100.3475


{S_RISK_VARIANCE20} (159 * 1972):
S_INFO_WINDCODE  000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                              
2010-12           0.043705   0.241605   0.484243   0.230277   0.205580
2011-01           0.087365   0.211352   0.309248   0.495517   0.441427
2011-02           0.041962   0.063655   0.059765   0.747305   0.067242
2011-03           0.041182   0.061819   0.081115   0.365652   0.214133
2011-04           0.111408   0.059983   0.144665   0.352050   0.163832


{S_RISK_VARIANCE60} (159 * 1972):
S_INFO_WINDCODE  000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                              
2010-12           0.109308   0.201698   0.331925   0.480538   0.162898
2011-01           0.081300   0.205505   0.381887   0.473697   0.276363
2011-02           0.058572   0.167685   0.331332   0.468365   0.240370
2011-03           0.056510   0.111077   0.150993   0.485600   0.233130
2011-04           0.063085   0.054470   0.095470   0.508748   0.139095


{S_RISK_LOSSVARIANCE20} (159 * 1972):
S_INFO_WINDCODE  000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                              
2010-12           0.053823   0.077690   0.162115   0.111925   0.113577
2011-01           0.028297   0.077450   0.168522   0.120645   0.116145
2011-02           0.034408   0.084615   0.110595   0.160458   0.114918
2011-03           0.032953   0.054705   0.083030   0.172730   0.026665
2011-04           0.020040   0.024795   0.040067   0.099190   0.040485


{S_RISK_LOSSVARIANCE60} (159 * 1972):
S_INFO_WINDCODE  000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                              
2010-12           0.061472   0.056778   0.119095   0.119423   0.082443
2011-01           0.041357   0.069182   0.129263   0.117492   0.102078
2011-02           0.042513   0.070825   0.128447   0.126650   0.104862
2011-03           0.035355   0.066420   0.113492   0.125372   0.099017
2011-04           0.034785   0.062015   0.108815   0.118875   0.092660


{stock_return} (159 * 1972):
          000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                       
2010-12         NaN        NaN        NaN        NaN        NaN
2011-01   -0.030399  -0.002433   0.020086   0.059034   0.026354
2011-02    0.040496  -0.004878   0.015471   0.231982   0.078459
2011-03    0.009416   0.064951   0.085873  -0.168647   0.022487
2011-04    0.131841  -0.024166  -0.008929  -0.039582   0.201811


{stock_return_next} (159 * 1972):
          000001.SZ  000002.SZ  000006.SZ  000009.SZ  000011.SZ
TRADE_DT                                                       
2010-12   -0.030399  -0.002433   0.020086   0.059034   0.026354
2011-01    0.040496  -0.004878   0.015471   0.231982   0.078459
2011-02    0.009416   0.064951   0.085873  -0.168647   0.022487
2011-03    0.131841  -0.024166  -0.008929  -0.039582   0.201811
2011-04   -0.029670  -0.075472  -0.092664  -0.071551   0.139935


In [9]:
# 正式回测

factor_to_test = ['div_12m','pe_ttm','mv','S_RISK_VARIANCE60','ROETTM','NPPCCutGrRt','OPITPrf','NetOCFTOReve','CurTotLia','CurRt','TotAstTRtTTM']
globals()['corr_matrix'] = pd.DataFrame(index=factor_to_test, columns=factor_to_test)
test_n_corsstest(factor_to_test, 5, 3)
【div_12m因子IC分析】
normal_ic均值:0.0229,normal_ic标准差:0.0552,normal_icir:0.4143,IC>0占比:0.6709
rank_ic均值:0.0375,rank_ic标准差:0.0763,rank_icir:0.4908,IC>0占比:0.7025
【pe_ttm因子IC分析】
normal_ic均值:0.0152,normal_ic标准差:0.0645,normal_icir:0.2363,IC>0占比:0.6013
rank_ic均值:0.0443,rank_ic标准差:0.0855,rank_icir:0.5177,IC>0占比:0.7025
【mv因子IC分析】
normal_ic均值:-0.0382,normal_ic标准差:0.1307,normal_icir:-0.2926,IC>0占比:0.3354
rank_ic均值:-0.0416,rank_ic标准差:0.1552,rank_icir:-0.2677,IC>0占比:0.3544
【S_RISK_VARIANCE60因子IC分析】
normal_ic均值:-0.0282,normal_ic标准差:0.0948,normal_icir:-0.297,IC>0占比:0.3861
rank_ic均值:-0.0489,rank_ic标准差:0.1113,rank_icir:-0.4396,IC>0占比:0.3291
【ROETTM因子IC分析】
normal_ic均值:0.017,normal_ic标准差:0.0614,normal_icir:0.2763,IC>0占比:0.7115
rank_ic均值:0.0432,rank_ic标准差:0.1217,rank_icir:0.3549,IC>0占比:0.6923
【NPPCCutGrRt因子IC分析】
normal_ic均值:0.0191,normal_ic标准差:0.0276,normal_icir:0.6914,IC>0占比:0.7308
rank_ic均值:0.0495,rank_ic标准差:0.0789,rank_icir:0.627,IC>0占比:0.7692
【OPITPrf因子IC分析】
normal_ic均值:0.0079,normal_ic标准差:0.0264,normal_icir:0.2994,IC>0占比:0.6346
rank_ic均值:0.0147,rank_ic标准差:0.0845,rank_icir:0.174,IC>0占比:0.5192
【NetOCFTOReve因子IC分析】
normal_ic均值:0.0175,normal_ic标准差:0.026,normal_icir:0.673,IC>0占比:0.7885
rank_ic均值:0.0297,rank_ic标准差:0.0607,rank_icir:0.4897,IC>0占比:0.7115
【CurTotLia因子IC分析】
normal_ic均值:-0.0038,normal_ic标准差:0.0359,normal_icir:-0.1072,IC>0占比:0.4615
rank_ic均值:-0.0051,rank_ic标准差:0.0464,rank_icir:-0.1099,IC>0占比:0.4615
【CurRt因子IC分析】
normal_ic均值:-0.0141,normal_ic标准差:0.0569,normal_icir:-0.2478,IC>0占比:0.4423
rank_ic均值:-0.0189,rank_ic标准差:0.0651,rank_icir:-0.2905,IC>0占比:0.3462
【TotAstTRtTTM因子IC分析】
normal_ic均值:0.0116,normal_ic标准差:0.0345,normal_icir:0.3372,IC>0占比:0.6731
rank_ic均值:0.0268,rank_ic标准差:0.0829,rank_icir:0.323,IC>0占比:0.6538
div_12m因子&pe_ttm因子相关系数: 0.1783
div_12m因子&mv因子相关系数: 0.1123
div_12m因子&S_RISK_VARIANCE60因子相关系数: -0.0635
div_12m因子&ROETTM因子相关系数: 0.1091
div_12m因子&NPPCCutGrRt因子相关系数: 0.2291
div_12m因子&OPITPrf因子相关系数: 0.1262
div_12m因子&NetOCFTOReve因子相关系数: 0.1159
div_12m因子&CurTotLia因子相关系数: 0.0003
div_12m因子&CurRt因子相关系数: 0.025
div_12m因子&TotAstTRtTTM因子相关系数: 0.1741
pe_ttm因子&mv因子相关系数: 0.1467
pe_ttm因子&S_RISK_VARIANCE60因子相关系数: -0.0551
pe_ttm因子&ROETTM因子相关系数: 0.1333
pe_ttm因子&NPPCCutGrRt因子相关系数: 0.2229
pe_ttm因子&OPITPrf因子相关系数: 0.0613
pe_ttm因子&NetOCFTOReve因子相关系数: 0.1215
pe_ttm因子&CurTotLia因子相关系数: -0.0393
pe_ttm因子&CurRt因子相关系数: 0.0688
pe_ttm因子&TotAstTRtTTM因子相关系数: 0.0907
mv因子&S_RISK_VARIANCE60因子相关系数: 0.0229
mv因子&ROETTM因子相关系数: 0.1596
mv因子&NPPCCutGrRt因子相关系数: 0.1827
mv因子&OPITPrf因子相关系数: 0.2091
mv因子&NetOCFTOReve因子相关系数: 0.0921
mv因子&CurTotLia因子相关系数: -0.0874
mv因子&CurRt因子相关系数: -0.0545
mv因子&TotAstTRtTTM因子相关系数: 0.1369
S_RISK_VARIANCE60因子&ROETTM因子相关系数: 0.1747
S_RISK_VARIANCE60因子&NPPCCutGrRt因子相关系数: 0.2645
S_RISK_VARIANCE60因子&OPITPrf因子相关系数: 0.2204
S_RISK_VARIANCE60因子&NetOCFTOReve因子相关系数: 0.1824
S_RISK_VARIANCE60因子&CurTotLia因子相关系数: 0.0886
S_RISK_VARIANCE60因子&CurRt因子相关系数: 0.1379
S_RISK_VARIANCE60因子&TotAstTRtTTM因子相关系数: 0.1811
ROETTM因子&NPPCCutGrRt因子相关系数: 0.2745
ROETTM因子&OPITPrf因子相关系数: 0.1998
ROETTM因子&NetOCFTOReve因子相关系数: 0.2177
ROETTM因子&CurTotLia因子相关系数: 0.1532
ROETTM因子&CurRt因子相关系数: 0.2404
ROETTM因子&TotAstTRtTTM因子相关系数: 0.1677
NPPCCutGrRt因子&OPITPrf因子相关系数: 0.2325
NPPCCutGrRt因子&NetOCFTOReve因子相关系数: 0.283
NPPCCutGrRt因子&CurTotLia因子相关系数: 0.2666
NPPCCutGrRt因子&CurRt因子相关系数: 0.3317
NPPCCutGrRt因子&TotAstTRtTTM因子相关系数: 0.2623
OPITPrf因子&NetOCFTOReve因子相关系数: 0.1892
OPITPrf因子&CurTotLia因子相关系数: 0.1872
OPITPrf因子&CurRt因子相关系数: 0.2145
OPITPrf因子&TotAstTRtTTM因子相关系数: 0.1456
NetOCFTOReve因子&CurTotLia因子相关系数: 0.0783
NetOCFTOReve因子&CurRt因子相关系数: 0.1642
NetOCFTOReve因子&TotAstTRtTTM因子相关系数: 0.172
CurTotLia因子&CurRt因子相关系数: 0.2029
CurTotLia因子&TotAstTRtTTM因子相关系数: 0.1356
CurRt因子&TotAstTRtTTM因子相关系数: -0.0033
In [ ]: